#######################################################################
########REPLICATION FILE: RACIAL ISOLATION DRIVES RACIAL VOTING########
########################POLITICAL BEHAVIOR#############################
####################MELISSA SANDS, DANIEL DE KADT######################
#######################################################################

####################################################
############## TITLE DEEDS ANALYSIS ################
####################################################

rm(list=ls(all=T))

library(foreign)
library(sandwich)
library(lmtest)
library(stargazer)
library(readstata13)

##### Clustered SEs Function ##### 
vcovCluster <- function(fm, clvar){
  # R-codes (www.r-project.org) for computing
  # clustered-standard errors. Mahmood Arai, Jan 26, 2008.
  # The arguments of the function are:
  # fitted model, cluster1 and cluster2
  # You need to install libraries `sandwich' and `lmtest'
  library(sandwich);library(lmtest);
  x <- eval(fm$call$data, envir = parent.frame())
  if ("polr" %in% class(fm)) {
    require(MASS)
    cluster <- x[rownames(predict(fm, type = "probs")), clvar]
  } else {
    cluster <- x[names(predict(fm)), clvar]
  }
  M <- length(unique(cluster))
  N <- length(cluster)
  K <- dim(vcov(fm))[1]
  dfc <- (M/(M-1))*((N-1)/(N-K))
  uj  <- apply(estfun(fm),2, function(x) tapply(x, cluster, sum));
  vcovCL <- dfc*sandwich(fm, meat=crossprod(uj)/N)
}

setwd("C:\\Users\\ddeka\\Dropbox\\South_Africa_segregation")
setwd("C:\\Users\\Melissa Sands\\Dropbox\\South_Africa_segregation")

subag2<-read.csv("replication_archive\\data\\titledeeds_aggregate_data.csv", stringsAsFactors = F)

##### TABLE I.16 ##### 
reg2<-lm(white_iso2011~log_mean_tri + white_frac1991 +black_frac1991 +colored_frac1991 +as.factor(cat_b), data=subag2)
reg2x<-lm(white_iso2011~totalpp + log_mean_tri+white_frac1991 +black_frac1991 +colored_frac1991 +as.factor(cat_b), data=subag2)

se2<-vcovCluster(reg2,"cat_b")
se2x<-vcovCluster(reg2x,"cat_b")

reg3<-lm(white_iso2011~nn_lra + white_frac1991 +black_frac1991 +colored_frac1991 +as.factor(cat_b), data=subag2)
reg3x<-lm(white_iso2011~totalpp +nn_lra+ white_frac1991 +black_frac1991 +colored_frac1991 +as.factor(cat_b), data=subag2)

se3<-vcovCluster(reg3,"cat_b")
se3x<-vcovCluster(reg3x,"cat_b")

stargazer(reg2, reg2x, reg3, reg3x, se=list(se2, se2x, se3, se3x), omit=c("cat_b","white_frac1991" , "black_frac1991", "colored_frac1991", "Constant" ), omit.stat = c("ser", "f"))

##### TABLE I.17 ##### 
reg4<-lm(totalpp ~ log_mean_tri + white_frac1991 +black_frac1991 +colored_frac1991 +as.factor(cat_b), data=subag2)
coeftest(reg4, vcov=vcovCluster(reg4,"cat_b"))

se4<-vcovCluster(reg2,"cat_b")

reg5<-lm(totalpp ~ nn_lra + white_frac1991 +black_frac1991 +colored_frac1991 +as.factor(cat_b), data=subag2)
coeftest(reg5, vcov=vcovCluster(reg5,"cat_b"))

se4<-vcovCluster(reg4,"cat_b")
se5<-vcovCluster(reg5,"cat_b")

stargazer(reg4, reg5, se=list(se4, se5), omit=c("cat_b","white_frac1991" , "black_frac1991", "colored_frac1991", "Constant" ), omit.stat = c("ser","f"))

##### TABLE I.15 ##### 
# For Table I.15, the data are not available in this replication file as they are proprietary and the analysis is conducted at the EA level, too low for release.
# If you would like to acquire the data, contact The Gauteng City Region Observatory (GCRO, our contacts were Chris Wray and Samy Katumba), and request 
# the "Lightstone data on historical home sales in Gauteng." The GCRO should put you in touch with Lightstone Property, who own the data. 
# You will need to sign an NDA to access the data. If you have the data, you will need to merge the sales data with the 1991 census data at the EA level. 
# You will need to have a long 1991 EA level dataset with home sales for each year. The following code will replicate the analyses that produce Table I.15.

d<-read.csv(YOUR DATA FILE HERE, stringsAsFactors=F)
d<-subset(d, d$ID!=" " & d$TOT_POP!=0) #delete missing EA IDs
d93<-subset(d, d$YEAR==1993|d$YEAR==1994|d$YEAR==1995|d$YEAR==1996|d$YEAR==1997|d$YEAR==1998|d$YEAR==1999) #6118 (4510 populated EAs)

#calculate race proportions
d93$blackfrac<-d93$BLACKS /d93$TOT_POP
d93$whitefrac<-d93$WHITES /d93$TOT_POP
d93$coloredfrac<-d93$COLRDS /d93$TOT_POP
d93$asianfrac<-d93$ASIANS /d93$TOT_POP

## other controls
d93$prop_formal_dwell<-d93$DW_FORMAL /d93$DW_TOTAL
d93$prop_owned<-d93$OWNED /d93$DW_TOTAL

d93$transfers_per_formalHH<-ifelse(d93$HH_FORMAL >0, d93$total/d93$HH_FORMAL, 0)

d93$afrikaans<-d93$AFR/d93$TOT_POP
d93$english<-d93$ENG/d93$TOT_POP
d93$jewish<-d93$JEW/d93$TOT_POP

#calculate transfers per person
d93$transferspp<-d93$total /d93$TOT_POP

## run regressions 
reg1<-lm(transferspp ~ 
           whitefrac
         +coloredfrac 
         +asianfrac 
         ,data=d93) 
summary(reg1)

reg2<-lm(transferspp ~ 
           whitefrac 
         +coloredfrac 
         +asianfrac 
         +afrikaans
         ,data=d93)
summary(reg2)

reg3<-lm(transferspp ~ 
           whitefrac 
         +coloredfrac 
         +asianfrac 
         +afrikaans
         +HOUSEH_INC
         + POP_DENS 
         +prop_owned
         ,data=d93) 
summary(reg3)

stargazer(reg1, reg2, reg3, omit.stat = c("ser","f"))
